library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
# For adding new columns
library(tibble)
# Load data
sys.source("./scripts/code_join_data_full_dataset.R", envir = knitr::knit_global())
# Load functions
## Models
sys.source("./R/functions_models.R", envir = knitr::knit_global())

Select performance and traits variables

Transform variables

Backtranformation info

data_for_models <-
    data_for_models %>%

        # Select variables for analysis
        dplyr::select(c(1:7,9,11:ncol(data_for_models)))

Models: Questions 1 and 2

\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]

# Take response variables names 
response_vars_q1_q2 <- 
    set_names(names(data_for_models)[5:14])

response_vars_q1_q2 
  total_biomass   above_biomass   below_biomass             rgr            amax 
"total_biomass" "above_biomass" "below_biomass"           "rgr"          "amax" 
             gs             wue            d13c            d15n            pnue 
           "gs"           "wue"          "d13c"          "d15n"          "pnue" 
models_q1_q2 <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x, 
                                                              data = data_for_models))
model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models)

model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode), 
                         data = data_for_models) 

log_models <- list(model_q2_wue_log, model_q2_pnue_log)
names(log_models) <- c("wue_log", "pnue_log")
# Append log models to model list
models_q1_q2 <- append(log_models, models_q1_q2)

Models Nodule count

# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")

# Delete unused variables
data_nodules_cleaned <-
    data_nodules_cleaned %>%
        
        # add id to rownames for keep track of the rows
        column_to_rownames("id") %>% 
        dplyr::select(spcode,treatment, everything())

m4 lmer gaussian

lmer_gaussian <- lmer(number_of_root_nodulation ~ treatment + init_height + 
                           (1 |spcode),
                       data = data_nodules_cleaned)
lmer_gaussian_log <-  lmer(log(number_of_root_nodulation) ~ treatment + init_height + 
                           (1 |spcode),
                       data = data_nodules_cleaned)

m5 glmer poisson

glmer_poisson <-  glmer(number_of_root_nodulation ~ treatment + init_height + 
                           (1 |spcode), family = "poisson",
                       data = data_nodules_cleaned)
models_nodule_count <- list(lmer_gaussian, lmer_gaussian_log, glmer_poisson)
names(models_nodule_count) <- c("lmer_gaussian",
                                     "lmer_gaussian_log",
                                     "glmer_poisson")

Mycorrhizal colonization

I decided not to include it, because I want to focus on Nfixing vrs non-Fixing, 
also I don't trust on the data

Models: Question 3

\[performance\sim treatment:fixer:scale(trait)\ + initial\ height + random( 1|\ specie)\]

Scale preditors

data_for_models_scaled <-
    data_for_models %>% 
        mutate(across(c(4,9:14),scale)) 
# Select traits (x_vars)
traits_names <- 
    set_names(names(data_for_models_scaled)[c(9:14)])

traits_names
  amax     gs    wue   d13c   d15n   pnue 
"amax"   "gs"  "wue" "d13c" "d15n" "pnue" 
# Select plants performance vars (y_vars)
performance_names <- 
    set_names(names(data_for_models_scaled)[c(5:8)])

performance_names
  total_biomass   above_biomass   below_biomass             rgr 
"total_biomass" "above_biomass" "below_biomass"           "rgr" 

Models lme4::lmer

models_lmer_formulas <- model_combinations_formulas(performance_names, traits_names)

length(models_lmer_formulas)
[1] 24
models_lmer_formulas[1]
$`abovebiomass~amax`
above_biomass ~ treatment:nfixer:amax + init_height + (1 | spcode)
<environment: 0x7fa0499be218>
models_q3_lmer <- map(models_lmer_formulas, 
                       ~ lmer(.x, data = data_for_models_scaled))

Models nlme::lme

models_nlme_formulas <- model_combinations_formulas(performance_names, 
                                                    traits_names, 
                                                    nlme = T)

length(models_nlme_formulas)
[1] 24
models_nlme_formulas[1]
$`abovebiomass~amax`
above_biomass ~ treatment:nfixer:amax + init_height
<environment: 0x7fa04ad86228>
models_q3_nlme <- map(models_nlme_formulas, ~model_nlme(., data = data_for_models_scaled))

Validation plots

Collinearity

map(models_q1_q2, check_collinearity)
$wue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.30         1.14      0.77
        treatment 3.85         1.96      0.26
      init_height 1.05         1.02      0.96
 nfixer:treatment 4.57         2.14      0.22

$pnue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
           nfixer 1.30         1.14      0.77
        treatment 3.85         1.96      0.26
      init_height 1.05         1.02      0.96
 nfixer:treatment 4.55         2.13      0.22

$total_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.86         1.97      0.26
           nfixer 1.12         1.06      0.90
      init_height 1.06         1.03      0.95
 treatment:nfixer 4.12         2.03      0.24

$above_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.06         1.03      0.94
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.98         2.00      0.25

$below_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.86         1.96      0.26
           nfixer 1.16         1.08      0.86
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.22         2.06      0.24

$rgr
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.87         1.97      0.26
           nfixer 1.05         1.02      0.95
      init_height 1.06         1.03      0.94
 treatment:nfixer 3.96         1.99      0.25

$amax
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.88         1.97      0.26
           nfixer 1.03         1.01      0.97
      init_height 1.07         1.03      0.94
 treatment:nfixer 3.90         1.98      0.26

$gs
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 3.83         1.96      0.26
      nfixer 1.92         1.39      0.52
 init_height 1.03         1.01      0.97

Moderate Correlation

             Term  VIF Increased SE Tolerance
 treatment:nfixer 6.08         2.47      0.16

$wue
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.85         1.96      0.26
           nfixer 1.19         1.09      0.84
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.29         2.07      0.23

$d13c
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.85         1.96      0.26
           nfixer 1.20         1.09      0.84
      init_height 1.05         1.03      0.95
 treatment:nfixer 4.31         2.07      0.23

$d15n
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.84         1.96      0.26
           nfixer 1.41         1.19      0.71
      init_height 1.04         1.02      0.96
 treatment:nfixer 4.84         2.20      0.21

$pnue
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.84         1.96      0.26
           nfixer 1.33         1.15      0.75
      init_height 1.04         1.02      0.96
 treatment:nfixer 4.62         2.15      0.22
# map(models_nodule_count, check_collinearity)
#Warning: Model has interaction terms. VIFs might be inflated. You may check 
#multicollinearity among predictors of a model without interaction terms.

#map(models_list_q3, check_mul)

Bolker’s plots

bolker_validation <- function(model) {
    
    
    a <- plot(model, type = c("p", "smooth"))
    
    ## heteroscedasticity
    b <-  plot(model,sqrt(abs(resid(.))) ~ fitted(.), type = c("p", "smooth"))
   
    cowplot::plot_grid(a,b,nrow = 1)
}

Models for questions 1 and 2

map(models_q1_q2, bolker_validation)
$wue_log


$pnue_log


$total_biomass


$above_biomass


$below_biomass


$rgr


$amax


$gs


$wue


$d13c


$d15n


$pnue

Models for nodule count

map(models_nodule_count, bolker_validation)
$lmer_gaussian


$lmer_gaussian_log


$glmer_poisson

Models for question 3

lme4 models

map(models_q3_lmer, bolker_validation)
$`abovebiomass~amax`


$`abovebiomass~d13c`


$`abovebiomass~d15n`


$`abovebiomass~gs`


$`abovebiomass~pnue`


$`abovebiomass~wue`


$`belowbiomass~amax`


$`belowbiomass~d13c`


$`belowbiomass~d15n`


$`belowbiomass~gs`


$`belowbiomass~pnue`


$`belowbiomass~wue`


$`rgr~amax`


$`rgr~d13c`


$`rgr~d15n`


$`rgr~gs`


$`rgr~pnue`


$`rgr~wue`


$`totalbiomass~amax`


$`totalbiomass~d13c`


$`totalbiomass~d15n`


$`totalbiomass~gs`


$`totalbiomass~pnue`


$`totalbiomass~wue`

nlme models

map(models_q3_nlme, bolker_validation)
$`abovebiomass~amax`


$`abovebiomass~d13c`


$`abovebiomass~d15n`


$`abovebiomass~gs`


$`abovebiomass~pnue`


$`abovebiomass~wue`


$`belowbiomass~amax`


$`belowbiomass~d13c`


$`belowbiomass~d15n`


$`belowbiomass~gs`


$`belowbiomass~pnue`


$`belowbiomass~wue`


$`rgr~amax`


$`rgr~d13c`


$`rgr~d15n`


$`rgr~gs`


$`rgr~pnue`


$`rgr~wue`


$`totalbiomass~amax`


$`totalbiomass~d13c`


$`totalbiomass~d15n`


$`totalbiomass~gs`


$`totalbiomass~pnue`


$`totalbiomass~wue`

Performance package

Models for questions 1,2

map(models_q1_q2, check_model)
$wue_log


$pnue_log


$total_biomass


$above_biomass


$below_biomass


$rgr


$amax


$gs


$wue


$d13c


$d15n


$pnue

Models for nodule count

map(models_nodule_count, check_model)
$lmer_gaussian


$lmer_gaussian_log


$glmer_poisson

Models for question 3

map(models_q3_lmer, check_model)
$`abovebiomass~amax`


$`abovebiomass~d13c`


$`abovebiomass~d15n`


$`abovebiomass~gs`


$`abovebiomass~pnue`


$`abovebiomass~wue`


$`belowbiomass~amax`


$`belowbiomass~d13c`


$`belowbiomass~d15n`


$`belowbiomass~gs`


$`belowbiomass~pnue`


$`belowbiomass~wue`


$`rgr~amax`


$`rgr~d13c`


$`rgr~d15n`


$`rgr~gs`


$`rgr~pnue`


$`rgr~wue`


$`totalbiomass~amax`


$`totalbiomass~d13c`


$`totalbiomass~d15n`


$`totalbiomass~gs`


$`totalbiomass~pnue`


$`totalbiomass~wue`

Save lists with the models

saveRDS(models_q1_q2, file = "./processed_data/models_q1_q2.RData") 
saveRDS(models_q3_lmer, file = "./processed_data/models_q3_3_way_interaction_lmer.RData") 
saveRDS(models_q3_nlme, file = "./processed_data/models_q3_3_way_interaction_nlme.RData")
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData")